Common function definitions:
plot_correlation <- function(dataset) {
#' Calculate the correlation among columns in the dataset
#' and plot a heat diagram with the results
#' @param dataset Data.frame to analyse
#' @return correlation data
corr_data <- cor(dataset)
corrplot(corr_data, type="full",
order = "original",
tl.cex = .6,
addCoefasPercent = TRUE,
col=brewer.pal(n=8, name="RdYlBu"))
return(corr_data)
}
# General EDA
describe_df <- function(name, df) {
paste("EDA for ", name, ":")
str(df)
summary(df)
paste("Number of NA values: ", sum(is.na(df)))
}
#### Preprocessing functions
remove_highly_correlated_features <- function(df) {
corr_data <- cor(df)
high_corr_cols <- findCorrelation(corr_data, cutoff = 0.9, verbose = FALSE, names = FALSE, exact = ncol(corr_data))
df[high_corr_cols] <- NULL
return(df)
}
remove_nzv <- function(df) {
# nearZeroVar() with saveMetrics = FALSE returns an vector
nzv <- nearZeroVar(df, saveMetrics = FALSE)
str(nzv)
# create a new data set and remove near zero variance features
df_new <- df[,-nzv]
str(df_new)
return(df_new)
}
#### Execute in parallel
run_in_parallel <- function(FUN, ...) {
# Find how many cores are on your machine
num_cores <- detectCores() # Result = Typically 4 to 6
# Create Cluster with desired number of cores. Don't use them all! Your computer is running other processes.
cl <- makeCluster(num_cores - 2)
# Register Cluster
registerDoParallel(cl)
result <- FUN(...)
# Stop Cluster. After performing your tasks, stop your cluster.
stopCluster(cl)
return(result)
}Load training datasets, one is for IPhone labeled sentiment, and the other one for the Samsung Galaxy phone.
Parsed with column specification:
cols(
.default = col_double()
)
See spec(...) for full column specifications.
Explore structure and descriptive statistics from the training datasets
Classes ‘spec_tbl_df’, ‘tbl_df’, ‘tbl’ and 'data.frame': 12973 obs. of 59 variables:
$ iphone : num 1 1 1 1 1 41 1 1 1 1 ...
$ samsunggalaxy : num 0 0 0 0 0 0 0 0 0 0 ...
$ sonyxperia : num 0 0 0 0 0 0 0 0 0 0 ...
$ nokialumina : num 0 0 0 0 0 0 0 0 0 0 ...
$ htcphone : num 0 0 0 0 0 0 0 0 0 0 ...
$ ios : num 0 0 0 0 0 6 0 0 0 0 ...
$ googleandroid : num 0 0 0 0 0 0 0 0 0 0 ...
$ iphonecampos : num 0 0 0 0 0 1 1 0 0 0 ...
$ samsungcampos : num 0 0 0 0 0 0 0 0 0 0 ...
$ sonycampos : num 0 0 0 0 0 0 0 0 0 0 ...
$ nokiacampos : num 0 0 0 0 0 0 0 0 0 0 ...
$ htccampos : num 0 0 0 0 0 0 0 0 0 0 ...
$ iphonecamneg : num 0 0 0 0 0 3 1 0 0 0 ...
$ samsungcamneg : num 0 0 0 0 0 0 0 0 0 0 ...
$ sonycamneg : num 0 0 0 0 0 0 0 0 0 0 ...
$ nokiacamneg : num 0 0 0 0 0 0 0 0 0 0 ...
$ htccamneg : num 0 0 0 0 0 0 0 0 0 0 ...
$ iphonecamunc : num 0 0 0 0 0 7 1 0 0 0 ...
$ samsungcamunc : num 0 0 0 0 0 0 0 0 0 0 ...
$ sonycamunc : num 0 0 0 0 0 0 0 0 0 0 ...
$ nokiacamunc : num 0 0 0 0 0 0 0 0 0 0 ...
$ htccamunc : num 0 0 0 0 0 0 0 0 0 0 ...
$ iphonedispos : num 0 0 0 0 0 1 13 0 0 0 ...
$ samsungdispos : num 0 0 0 0 0 0 0 0 0 0 ...
$ sonydispos : num 0 0 0 0 0 0 0 0 0 0 ...
$ nokiadispos : num 0 0 0 0 0 0 0 0 0 0 ...
$ htcdispos : num 0 0 0 0 0 0 0 0 0 0 ...
$ iphonedisneg : num 0 0 0 0 0 3 10 0 0 0 ...
$ samsungdisneg : num 0 0 0 0 0 0 0 0 0 0 ...
$ sonydisneg : num 0 0 0 0 0 0 0 0 0 0 ...
$ nokiadisneg : num 0 0 0 0 0 0 0 0 0 0 ...
$ htcdisneg : num 0 0 0 0 0 0 0 0 0 0 ...
$ iphonedisunc : num 0 0 0 0 0 4 9 0 0 0 ...
$ samsungdisunc : num 0 0 0 0 0 0 0 0 0 0 ...
$ sonydisunc : num 0 0 0 0 0 0 0 0 0 0 ...
$ nokiadisunc : num 0 0 0 0 0 0 0 0 0 0 ...
$ htcdisunc : num 0 0 0 0 0 0 0 0 0 0 ...
$ iphoneperpos : num 0 1 0 1 1 0 5 3 0 0 ...
$ samsungperpos : num 0 0 0 0 0 0 0 0 0 0 ...
$ sonyperpos : num 0 0 0 0 0 0 0 0 0 0 ...
$ nokiaperpos : num 0 0 0 0 0 0 0 0 0 0 ...
$ htcperpos : num 0 0 0 0 0 0 0 0 0 0 ...
$ iphoneperneg : num 0 0 0 0 0 0 4 1 0 0 ...
$ samsungperneg : num 0 0 0 0 0 0 0 0 0 0 ...
$ sonyperneg : num 0 0 0 0 0 0 0 0 0 0 ...
$ nokiaperneg : num 0 0 0 0 0 0 0 0 0 0 ...
$ htcperneg : num 0 0 0 0 0 0 0 0 0 0 ...
$ iphoneperunc : num 0 0 0 1 0 0 5 0 0 0 ...
$ samsungperunc : num 0 0 0 0 0 0 0 0 0 0 ...
$ sonyperunc : num 0 0 0 0 0 0 0 0 0 0 ...
$ nokiaperunc : num 0 0 0 0 0 0 0 0 0 0 ...
$ htcperunc : num 0 0 0 0 0 0 0 0 0 0 ...
$ iosperpos : num 0 0 0 0 0 0 0 0 0 0 ...
$ googleperpos : num 0 0 0 0 0 0 0 0 0 0 ...
$ iosperneg : num 0 0 0 0 0 0 0 0 0 0 ...
$ googleperneg : num 0 0 0 0 0 0 0 0 0 0 ...
$ iosperunc : num 0 0 0 0 0 0 0 0 0 0 ...
$ googleperunc : num 0 0 0 0 0 0 0 0 0 0 ...
$ iphonesentiment: num 0 0 0 0 0 4 4 0 0 0 ...
- attr(*, "spec")=
.. cols(
.. iphone = [32mcol_double()[39m,
.. samsunggalaxy = [32mcol_double()[39m,
.. sonyxperia = [32mcol_double()[39m,
.. nokialumina = [32mcol_double()[39m,
.. htcphone = [32mcol_double()[39m,
.. ios = [32mcol_double()[39m,
.. googleandroid = [32mcol_double()[39m,
.. iphonecampos = [32mcol_double()[39m,
.. samsungcampos = [32mcol_double()[39m,
.. sonycampos = [32mcol_double()[39m,
.. nokiacampos = [32mcol_double()[39m,
.. htccampos = [32mcol_double()[39m,
.. iphonecamneg = [32mcol_double()[39m,
.. samsungcamneg = [32mcol_double()[39m,
.. sonycamneg = [32mcol_double()[39m,
.. nokiacamneg = [32mcol_double()[39m,
.. htccamneg = [32mcol_double()[39m,
.. iphonecamunc = [32mcol_double()[39m,
.. samsungcamunc = [32mcol_double()[39m,
.. sonycamunc = [32mcol_double()[39m,
.. nokiacamunc = [32mcol_double()[39m,
.. htccamunc = [32mcol_double()[39m,
.. iphonedispos = [32mcol_double()[39m,
.. samsungdispos = [32mcol_double()[39m,
.. sonydispos = [32mcol_double()[39m,
.. nokiadispos = [32mcol_double()[39m,
.. htcdispos = [32mcol_double()[39m,
.. iphonedisneg = [32mcol_double()[39m,
.. samsungdisneg = [32mcol_double()[39m,
.. sonydisneg = [32mcol_double()[39m,
.. nokiadisneg = [32mcol_double()[39m,
.. htcdisneg = [32mcol_double()[39m,
.. iphonedisunc = [32mcol_double()[39m,
.. samsungdisunc = [32mcol_double()[39m,
.. sonydisunc = [32mcol_double()[39m,
.. nokiadisunc = [32mcol_double()[39m,
.. htcdisunc = [32mcol_double()[39m,
.. iphoneperpos = [32mcol_double()[39m,
.. samsungperpos = [32mcol_double()[39m,
.. sonyperpos = [32mcol_double()[39m,
.. nokiaperpos = [32mcol_double()[39m,
.. htcperpos = [32mcol_double()[39m,
.. iphoneperneg = [32mcol_double()[39m,
.. samsungperneg = [32mcol_double()[39m,
.. sonyperneg = [32mcol_double()[39m,
.. nokiaperneg = [32mcol_double()[39m,
.. htcperneg = [32mcol_double()[39m,
.. iphoneperunc = [32mcol_double()[39m,
.. samsungperunc = [32mcol_double()[39m,
.. sonyperunc = [32mcol_double()[39m,
.. nokiaperunc = [32mcol_double()[39m,
.. htcperunc = [32mcol_double()[39m,
.. iosperpos = [32mcol_double()[39m,
.. googleperpos = [32mcol_double()[39m,
.. iosperneg = [32mcol_double()[39m,
.. googleperneg = [32mcol_double()[39m,
.. iosperunc = [32mcol_double()[39m,
.. googleperunc = [32mcol_double()[39m,
.. iphonesentiment = [32mcol_double()[39m
.. )
[1] "Number of NA values: 0"
Explore correlation between all variables:
[1] "Number of original features: 59"
[1] "Number of features after cleanup: 46"
iphone samsunggalaxy sonyxperia nokialumina googleandroid iphonecampos
iphone 1.000000000 0.0197858228 -0.011617908 -0.0134231660 0.1075300211 0.078157326
samsunggalaxy 0.019785823 1.0000000000 0.366670822 -0.0060880044 0.2361624448 0.030556160
sonyxperia -0.011617908 0.3666708219 1.000000000 -0.0063498609 -0.0182884699 0.005067537
nokialumina -0.013423166 -0.0060880044 -0.006349861 1.0000000000 -0.0011148600 0.029824073
googleandroid 0.107530021 0.2361624448 -0.018288470 -0.0011148600 1.0000000000 0.104419788
iphonecampos 0.078157326 0.0305561602 0.005067537 0.0298240731 0.1044197879 1.000000000
samsungcampos 0.057395445 0.2521205947 0.050139994 0.0092986543 0.3154874208 0.062438375
sonycampos -0.004593694 0.1459691107 0.396750611 -0.0027543180 -0.0002056770 0.045008555
nokiacampos -0.008439469 -0.0004004217 -0.004231735 0.7004150055 0.0032844953 0.030816940
htccampos 0.022717373 0.0652742531 0.016506652 0.0212952463 0.1480948517 0.623911948
iphonecamneg 0.490523588 0.1260628010 -0.006715050 0.0632452640 0.3918021616 0.541339970
samsungcamneg 0.142552547 0.3429191699 -0.004308481 0.0095460787 0.7114026076 0.117450549
sonycamneg -0.001830062 0.0318207535 0.345295791 -0.0012288604 0.0135394518 0.019994295
htccamneg 0.104612765 0.2227765630 -0.012284395 0.0372556408 0.5627028612 0.206584693
iphonecamunc 0.750403174 -0.0101550995 -0.007638359 0.0162370821 0.0429551472 0.473266316
samsungcamunc 0.073451360 0.3161343514 0.058776666 0.0409222547 0.3914328531 0.076943199
sonycamunc -0.003064444 0.1041234759 0.376632870 -0.0019143276 -0.0065777411 0.029397327
htccamunc 0.026137801 0.0729644969 0.014249220 0.0361240923 0.1661818489 0.321523039
iphonedispos 0.052624621 -0.0065259392 -0.018121019 0.0283164282 0.0669526940 0.272586547
sonydispos -0.003826569 0.0613598274 0.252589216 -0.0015281417 -0.0016689972 0.017749083
nokiadispos -0.008202154 0.0102477757 -0.003772222 0.6502528013 -0.0041735324 0.026317276
samsungcampos sonycampos nokiacampos htccampos iphonecamneg samsungcamneg
iphone 0.057395445 -0.004593694 -0.0084394692 0.022717373 0.490523588 0.142552547
samsunggalaxy 0.252120595 0.145969111 -0.0004004217 0.065274253 0.126062801 0.342919170
sonyxperia 0.050139994 0.396750611 -0.0042317350 0.016506652 -0.006715050 -0.004308481
nokialumina 0.009298654 -0.002754318 0.7004150055 0.021295246 0.063245264 0.009546079
googleandroid 0.315487421 -0.000205677 0.0032844953 0.148094852 0.391802162 0.711402608
iphonecampos 0.062438375 0.045008555 0.0308169399 0.623911948 0.541339970 0.117450549
samsungcampos 1.000000000 0.145429114 0.0148603600 0.090099060 0.206019550 0.608840260
sonycampos 0.145429114 1.000000000 -0.0018355589 0.058852134 0.013254078 0.032897238
nokiacampos 0.014860360 -0.001835559 1.0000000000 0.017761487 0.053371499 0.016046872
htccampos 0.090099060 0.058852134 0.0177614870 1.000000000 0.206896901 0.171078711
iphonecamneg 0.206019550 0.013254078 0.0533714992 0.206896901 1.000000000 0.468562960
samsungcamneg 0.608840260 0.032897238 0.0160468719 0.171078711 0.468562960 1.000000000
sonycamneg 0.053984838 0.408990574 -0.0008189489 0.016196483 0.065597700 0.072369704
htccamneg 0.295428414 0.013568090 0.0305012877 0.450715453 0.507638049 0.661017063
iphonecamunc 0.028875432 0.016442392 0.0186364023 0.163480446 0.643460020 0.061619492
samsungcamunc 0.814799293 0.164043120 0.0580378785 0.113420510 0.262665362 0.703837836
sonycamunc 0.098836218 0.528452314 -0.0012757645 0.037328532 0.034024096 0.050762079
htccamunc 0.104495332 0.056574443 0.0285796837 0.656659442 0.238011225 0.200439201
iphonedispos 0.039427134 0.019616805 0.0292070224 0.129812956 0.261983142 0.079343436
sonydispos 0.058122227 0.404993025 -0.0010183988 0.022015622 0.015890753 0.037424924
nokiadispos 0.038370716 -0.001636240 0.8708585691 0.014629187 0.050013170 0.042391084
sonycamneg htccamneg iphonecamunc samsungcamunc sonycamunc htccamunc iphonedispos
iphone -0.0018300621 0.104612765 0.750403174 0.07345136 -0.0030644436 0.026137801 0.052624621
samsunggalaxy 0.0318207535 0.222776563 -0.010155099 0.31613435 0.1041234759 0.072964497 -0.006525939
sonyxperia 0.3452957914 -0.012284395 -0.007638359 0.05877667 0.3766328700 0.014249220 -0.018121019
nokialumina -0.0012288604 0.037255641 0.016237082 0.04092225 -0.0019143276 0.036124092 0.028316428
googleandroid 0.0135394518 0.562702861 0.042955147 0.39143285 -0.0065777411 0.166181849 0.066952694
iphonecampos 0.0199942953 0.206584693 0.473266316 0.07694320 0.0293973265 0.321523039 0.272586547
samsungcampos 0.0539848377 0.295428414 0.028875432 0.81479929 0.0988362178 0.104495332 0.039427134
sonycampos 0.4089905736 0.013568090 0.016442392 0.16404312 0.5284523138 0.056574443 0.019616805
nokiacampos -0.0008189489 0.030501288 0.018636402 0.05803788 -0.0012757645 0.028579684 0.029207022
htccampos 0.0161964828 0.450715453 0.163480446 0.11342051 0.0373285316 0.656659442 0.129812956
iphonecamneg 0.0655977000 0.507638049 0.643460020 0.26266536 0.0340240958 0.238011225 0.261983142
samsungcamneg 0.0723697042 0.661017063 0.061619492 0.70383784 0.0507620794 0.200439201 0.079343436
sonycamneg 1.0000000000 0.038003476 0.025059285 0.12861974 0.6030047959 0.036701616 0.024827910
htccamneg 0.0380034764 1.000000000 0.112118301 0.37440485 0.0247482526 0.661111712 0.098294710
iphonecamunc 0.0250592852 0.112118301 1.000000000 0.05614601 0.0473068441 0.171847066 0.209007616
samsungcamunc 0.1286197355 0.374404849 0.056146005 1.00000000 0.2879321571 0.178753184 0.067375292
sonycamunc 0.6030047959 0.024748253 0.047306844 0.28793216 1.0000000000 0.108464499 0.025677953
htccamunc 0.0367016156 0.661111712 0.171847066 0.17875318 0.1084644987 1.000000000 0.093889706
iphonedispos 0.0248279099 0.098294710 0.209007616 0.06737529 0.0256779528 0.093889706 1.000000000
sonydispos 0.3372117408 0.020739500 0.012209066 0.10970557 0.4434927247 0.037799552 0.023609805
nokiadispos -0.0007300214 0.025720853 0.015624798 0.14095720 -0.0011372326 0.024518076 0.034499138
sonydispos nokiadispos htcdispos iphonedisneg sonydisneg htcdisneg
iphone -0.0038265686 -0.0082021539 0.007124916 0.175572621 -0.0027774491 0.0852733380
samsunggalaxy 0.0613598274 0.0102477757 0.024838634 0.017824130 0.0067858231 0.1888214611
sonyxperia 0.2525892158 -0.0037722221 0.003299105 -0.013589598 0.1632854243 -0.0021376810
nokialumina -0.0015281417 0.6502528013 0.010553720 0.023741675 -0.0006439773 0.0442224554
googleandroid -0.0016689972 -0.0041735324 0.057551786 0.121820615 0.0002154346 0.4470127937
iphonecampos 0.0177490834 0.0263172760 0.067428991 0.148650674 0.0028844028 0.1101016903
samsungcampos 0.0581222272 0.0383707155 0.032923476 0.065278833 0.0112361693 0.2384253937
sonycampos 0.4049930253 -0.0016362405 0.016457151 0.006716763 0.1318919118 0.0376238338
nokiacampos -0.0010183988 0.8708585691 0.008848525 0.022170003 -0.0004291655 0.0358414565
htccampos 0.0220156223 0.0146291869 0.140446973 0.041086089 0.0038024800 0.2704740233
iphonecamneg 0.0158907533 0.0500131699 0.055439580 0.346878956 0.0095879433 0.3747072961
samsungcamneg 0.0374249243 0.0423910836 0.065595998 0.152069590 0.0148796562 0.5213698250
sonycamneg 0.3372117408 -0.0007300214 0.015147863 0.040038521 0.1773804280 0.0827231553
htccamneg 0.0207394998 0.0257208533 0.135171608 0.151600245 0.0090239969 0.7289485078
iphonecamunc 0.0122090661 0.0156247976 0.023328496 0.253253711 0.0036413799 0.0798953676
samsungcamunc 0.1097055700 0.1409571952 0.052953879 0.098823606 0.0309080398 0.3482934774
sonycamunc 0.4434927247 -0.0011372326 0.018745458 0.024116264 0.2388456015 0.0626007989
htccamunc 0.0377995521 0.0245180762 0.144467590 0.067470212 0.0097737393 0.4894818428
iphonedispos 0.0236098053 0.0344991383 0.042102991 0.868765387 0.0049575187 0.1639872192
sonydispos 1.0000000000 -0.0009078136 0.015831997 0.015453255 0.8972867887 0.0530167286
nokiadispos -0.0009078136 1.0000000000 0.007594766 0.026055893 -0.0003825636 0.0313410172
iphonedisunc sonydisunc nokiadisunc htcdisunc iphoneperpos samsungperpos
iphone 0.250929821 -0.0045529578 -0.0075880802 0.024321607 -0.009507666 0.05153833
samsunggalaxy -0.027879224 0.0605561472 0.0146608209 0.071745603 -0.003169429 0.24286639
sonyxperia -0.017980754 0.2954284939 -0.0032330379 0.010002504 -0.028717034 0.02091401
nokialumina 0.002681337 -0.0013832449 0.4913317753 0.021114387 0.033345479 0.01745906
googleandroid 0.017791369 -0.0047529101 -0.0035769868 0.147068090 0.106060625 0.27035494
iphonecampos 0.188310035 0.0194031753 0.0096083040 0.156063063 0.348332416 0.04522127
samsungcampos 0.012312670 0.0676677209 0.0468115176 0.086766462 0.056272259 0.79389853
sonycampos 0.007383581 0.3888036304 -0.0014023637 0.055055469 0.009152257 0.04692259
nokiacampos 0.005012367 -0.0009218353 0.7956709983 0.016341701 0.034055807 0.02570991
htccampos 0.052906939 0.0250589384 0.0066901449 0.386465078 0.242604866 0.06784995
iphonecamneg 0.299074293 0.0360182554 0.0188800916 0.160153769 0.257568960 0.17226078
samsungcamneg 0.028896976 0.0633382871 0.0518993494 0.163322433 0.130965178 0.59055643
sonycamneg 0.027124278 0.5780401304 -0.0006256755 0.075291251 0.026581472 0.05911632
htccamneg 0.044419886 0.0376151684 0.0119082844 0.474559542 0.160387571 0.24682207
iphonecamunc 0.361321734 0.0300423951 0.0057211073 0.085983009 0.190248578 0.02742929
samsungcamunc 0.039678126 0.1990538876 0.1702617047 0.188403288 0.094227056 0.76866705
sonycamunc 0.030619761 0.7174089400 -0.0009746818 0.107382431 0.018793503 0.05801798
htccamunc 0.058927289 0.0742370804 0.0118944423 0.605687511 0.157350739 0.09708254
iphonedispos 0.883026229 0.0272975223 0.0095633363 0.171822145 0.659353827 0.09270267
sonydispos 0.012680171 0.7730887035 -0.0007780549 0.057416225 0.010195474 0.03933494
nokiadispos 0.005563423 -0.0008217356 0.8846581356 0.014046221 0.037455988 0.06373464
sonyperpos nokiaperpos htcperpos iphoneperneg samsungperneg sonyperneg
iphone -0.0063266804 -0.0105089285 0.030621280 0.013863107 0.11513005 -3.624744e-03
samsunggalaxy 0.0674892485 0.0018461920 0.088288678 0.045962586 0.30355991 9.976799e-03
sonyxperia 0.2661417156 -0.0046060231 0.004676818 -0.028774205 -0.00193120 1.224067e-01
nokialumina -0.0019186794 0.7374571504 0.039112646 0.033735414 0.01735403 -9.478623e-04
googleandroid 0.0008356533 -0.0022999416 0.209414192 0.212525142 0.55808994 5.656574e-03
iphonecampos 0.0139443738 0.0211775527 0.287084655 0.151918629 0.09203003 7.034030e-03
samsungcampos 0.0473952698 0.0215811648 0.115131849 0.112507527 0.54666952 1.936570e-02
sonycampos 0.3873108356 -0.0019979103 0.021325928 0.006280325 0.03414891 1.828294e-01
nokiacampos -0.0012786647 0.8874498813 0.026395973 0.030735731 0.02620755 -6.316834e-04
htccampos 0.0133369850 0.0128322395 0.586175384 0.074396779 0.13338754 3.725933e-03
iphonecamneg 0.0177907090 0.0393376378 0.205400545 0.308875213 0.36139403 2.055952e-02
samsungcamneg 0.0377776797 0.0235101641 0.242173307 0.259390474 0.82542086 2.546329e-02
sonycamneg 0.5019700016 -0.0008913832 0.022728267 0.044671059 0.07242245 4.654221e-01
htccamneg 0.0122902322 0.0229281922 0.550676429 0.246452247 0.50956552 8.808122e-03
iphonecamunc 0.0093256533 0.0114721001 0.092072136 0.113175498 0.05507073 4.866392e-03
samsungcamunc 0.0962822917 0.0823707914 0.178769401 0.164842312 0.68441917 4.595525e-02
sonycamunc 0.3945468871 -0.0013886032 0.029162038 0.014816199 0.05073823 2.071803e-01
htccamunc 0.0264284339 0.0248269423 0.652254312 0.108741499 0.17430489 9.450643e-03
iphonedispos 0.0117148037 0.0248343768 0.125257982 0.637768430 0.14207302 4.743860e-03
sonydispos 0.5024874205 -0.0011084740 0.019854981 0.005736525 0.03695255 1.683776e-01
nokiadispos -0.0011398179 0.8594816669 0.023916199 0.031306041 0.06582436 -5.630906e-04
htcperneg iphoneperunc samsungperunc sonyperunc htcperunc iosperpos
iphone 0.075975448 -0.016037424 0.046821536 -0.0030454006 0.0114142610 -2.005936e-02
samsunggalaxy 0.178409744 -0.017389038 0.184775057 0.0374818698 0.0449278125 -5.801680e-03
sonyxperia -0.012082758 -0.028220476 0.008007867 0.1516751025 -0.0048880488 -1.100924e-02
nokialumina 0.050050803 0.020197163 0.035274122 -0.0012035022 0.0237567173 3.071887e-02
googleandroid 0.433411367 0.056676383 0.221725967 -0.0041353037 0.1096851420 -1.670167e-02
iphonecampos 0.109391575 0.187259620 0.040154370 0.0199870203 0.0672830706 -3.991076e-03
samsungcampos 0.231171524 0.031844675 0.487766617 0.0578600356 0.0613038312 1.024714e-01
sonycampos 0.009013357 0.008176179 0.053436363 0.3788118033 0.0157814464 -3.118246e-03
nokiacampos 0.033774662 0.021551381 0.049252332 -0.0008020494 0.0172606280 1.031233e-01
htccampos 0.290333302 0.059577005 0.058155116 0.0180805293 0.2536778076 -6.121324e-03
iphonecamneg 0.348535462 0.217579385 0.138709352 0.0325695539 0.1147157357 -1.222897e-02
samsungcamneg 0.511629134 0.069828121 0.389305581 0.0608367349 0.1220479586 1.100727e-01
sonycamneg 0.028101916 0.026679111 0.084782171 0.6040116918 0.0262898504 -1.276466e-03
htccamneg 0.756413279 0.099731103 0.198497720 0.0295743766 0.4253605788 -1.093431e-02
iphonecamunc 0.070524621 0.174433158 0.033915160 0.0252557290 0.0573974810 -4.920454e-03
samsungcamunc 0.326658858 0.074922026 0.602568833 0.1525424290 0.1245163314 1.290120e-01
sonycamunc 0.018662930 0.027359282 0.086087797 0.5673580082 0.0319632466 -8.902753e-04
htccamunc 0.559357663 0.113207080 0.108102559 0.0506250723 0.6015134514 -7.866478e-03
iphonedispos 0.123985116 0.665237523 0.157446323 0.0276806457 0.0918953795 2.023206e-02
sonydispos 0.016513980 0.013792215 0.054220319 0.3407659807 0.0194072718 2.539185e-02
nokiadispos 0.030598054 0.018274423 0.118563425 -0.0007149570 0.0144895062 7.954113e-02
googleperpos googleperunc iphonesentiment
iphone 0.1180083451 0.0678592347 0.014858654
samsunggalaxy 0.2460460932 0.1422517633 -0.359172760
sonyxperia -0.0084673071 -0.0079160304 -0.233169880
nokialumina 0.0065145374 0.0079987614 -0.055961769
googleandroid 0.6385807446 0.3719984702 -0.189142050
iphonecampos 0.1179019014 0.0730039141 -0.029731217
samsungcampos 0.2982813037 0.1591714963 -0.112743311
sonycampos 0.0066726659 -0.0034336603 -0.090665090
nokiacampos 0.0115643182 0.0125178481 -0.033374561
htccampos 0.1631445917 0.1000310846 -0.120434115
iphonecamneg 0.4171852771 0.2410025125 -0.083963139
samsungcamneg 0.6586436387 0.3421196381 -0.185988857
sonycamneg 0.0209042210 -0.0015319542 -0.024826403
htccamneg 0.5783247613 0.3337273887 -0.222972178
iphonecamunc 0.0769155084 0.0581386691 0.001443485
samsungcamunc 0.4173746107 0.2694315123 -0.138045912
sonycamunc -0.0038250565 -0.0023864893 -0.050326854
htccamunc 0.2233051228 0.1624307557 -0.148881468
iphonedispos 0.1655756249 0.1796863033 0.014546824
sonydispos 0.0001583647 -0.0019050522 -0.038635303
nokiadispos -0.0024269726 -0.0015142114 -0.025922378
[ reached getOption("max.print") -- omitted 25 rows ]
Removing near zero vars:
'data.frame': 46 obs. of 4 variables:
$ freqRatio : num 5.04 14.13 44.17 497.88 61.25 ...
$ percentUnique: num 0.2081 0.054 0.0385 0.0231 0.0462 ...
$ zeroVar : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
$ nzv : logi FALSE FALSE TRUE TRUE TRUE FALSE ...
int [1:35] 3 4 5 7 8 9 10 11 12 13 ...
Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 12973 obs. of 11 variables:
$ iphone : num 1 1 1 1 1 41 1 1 1 1 ...
$ samsunggalaxy : num 0 0 0 0 0 0 0 0 0 0 ...
$ iphonecampos : num 0 0 0 0 0 1 1 0 0 0 ...
$ iphonecamunc : num 0 0 0 0 0 7 1 0 0 0 ...
$ iphonedispos : num 0 0 0 0 0 1 13 0 0 0 ...
$ iphonedisneg : num 0 0 0 0 0 3 10 0 0 0 ...
$ iphonedisunc : num 0 0 0 0 0 4 9 0 0 0 ...
$ iphoneperpos : num 0 1 0 1 1 0 5 3 0 0 ...
$ iphoneperneg : num 0 0 0 0 0 0 4 1 0 0 ...
$ iphoneperunc : num 0 0 0 1 0 0 5 0 0 0 ...
$ iphonesentiment: num 0 0 0 0 0 4 4 0 0 0 ...
[1] "Final number of features after cleanup: 11"
# set.seed(123)
# iphoneSample <- iphoneDF[sample(1:nrow(iphoneDF), 1000, replace=FALSE),]
#
# # Set up rfeControl with randomforest, repeated cross validation and no updates
# ctrl <- rfeControl(functions = rfFuncs,
# method = "repeatedcv",
# repeats = 5,
# verbose = FALSE)
#
# # Use rfe and omit the response variable (attribute 59 iphonesentiment)
# rfeResults <- run_in_parallel(rfe, iphoneSample[,1:58],
# iphoneSample$iphonesentiment,
# sizes=(1:58), rfeControl=ctrl)
#
# # Get results
# rfeResults
#
# # Plot results
# plot(rfeResults, type=c("g", "o"))
# ```
# ```{r}
# df <- df_nzv
# df$iphonesentiment <- as.factor(df$iphonesentiment)df <- iphoneDF
df$iphonesentiment <- as.factor(df$iphonesentiment)
plot_ly(df, x= ~df$iphonesentiment, type='histogram')
set.seed(90210)
dataPar <- createDataPartition(df$iphonesentiment, p = .70, list = FALSE)
train_df <- df[dataPar,]
test_df <- df[-dataPar,]
df_nzv$iphonesentiment <- as.factor(df_nzv$iphonesentiment)
set.seed(90210)
dataPar_nzv <- createDataPartition(df_nzv$iphonesentiment, p = .70, list = FALSE)
train_df_nzv <- df_nzv[dataPar_nzv,]
test_df_nzv <- df_nzv[-dataPar_nzv,]##### Decision Tree (C5.0) #####
set.seed(90210)
system.time(dt_c50 <- run_in_parallel(train, iphonesentiment~., data = df, method = 'C5.0', trControl=fitControl))Train model with featured-selected dataset:
set.seed(90210)
system.time(dt_c50_clean <- run_in_parallel(train, iphonesentiment~., data = df_nzv, method = 'C5.0', trControl=fitControl)) user system elapsed
1.41 0.39 46.08
C5.0
12973 samples
10 predictor
6 classes: '0', '1', '2', '3', '4', '5'
No pre-processing
Resampling: Cross-Validated (10 fold, repeated 5 times)
Summary of sample sizes: 11677, 11675, 11675, 11676, 11676, 11676, ...
Resampling results across tuning parameters:
model winnow trials Accuracy Kappa
rules FALSE 1 0.7393663 0.4858028
rules FALSE 10 0.7319048 0.4736668
rules FALSE 20 0.7319048 0.4736668
rules TRUE 1 0.7390271 0.4851496
rules TRUE 10 0.7307177 0.4719089
rules TRUE 20 0.7307177 0.4719089
tree FALSE 1 0.7394897 0.4875865
tree FALSE 10 0.7296391 0.4694632
tree FALSE 20 0.7296391 0.4694632
tree TRUE 1 0.7392276 0.4868073
tree TRUE 10 0.7300086 0.4708950
tree TRUE 20 0.7300086 0.4708950
Accuracy was used to select the optimal model using the largest value.
The final values used for the model were trials = 1, model = tree and winnow = FALSE.
set.seed(90210)
##### Random Forest #####
system.time(rf <- run_in_parallel(train, iphonesentiment~., data = df, method = 'rf', trControl = fitControl ))Train model with featured-selected dataset:
library(kknn)
set.seed(90210)
system.time(knn_model <- run_in_parallel(train.kknn, iphonesentiment ~ ., data = df))
pknn <- predict(knn_model, test_df)
post_knn <- postResample(pknn, test_df$iphonesentiment)Train model with featured-selected dataset:
set.seed(90210)
system.time(knn_model_nzv <- run_in_parallel(train.kknn, iphonesentiment ~ ., data = df_nzv))
pknn_nzv <- predict(knn_model_nzv, test_df_nzv)
postResample(pknn_nzv, test_df_nzv$iphonesentiment)pdt <- predict(dt_c50, test_df)
post_c50 <- postResample(pdt, test_df$iphonesentiment)
prf <- predict(rf, test_df)
post_rf <- postResample(prf, test_df$iphonesentiment)
# Creating confusion matrix
cm_dt <- confusionMatrix(pdt, test_df$iphonesentiment)
cm_dt
cmRF <- confusionMatrix(prf, test_df$iphonesentiment)
cmRF
cmsvm <- confusionMatrix(psvm, test_df$iphonesentiment)
cmsvm
cmknn <- confusionMatrix(pknn, test_df$iphonesentiment)
cmknn
# Grouped bar chart to evaluate model performance
Eval <- c(post_c50, post_rf, post_svm, post_knn)
barplot(Eval, main = "Model Evaluation", col = c("darkblue","red"))